//===============================================================================
// Copyright (c) 2007-2016  Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2004-2006 ATI Technologies Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files(the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions :
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//  File Name:   Compress.cpp
//  Description: A library to compress/decompress textures
//
//  Revisions
//  Apr 2014    -    Refactored Library
//                   Code clean to support MSV 2010 and up
//////////////////////////////////////////////////////////////////////////////

#include "common.h"
#include "compressonator.h"
#include "compress.h"
#include <assert.h>
#include <algorithm>

#ifdef _WIN32
#include "windows.h"
#include "sysinfoapi.h"
#endif

CMP_INT CMP_GetNumberOfProcessors() {
#ifndef _WIN32
//    return sysconf(_SC_NPROCESSORS_ONLN);
    return std::thread::hardware_concurrency();
#else
    // Figure out how many cores there are on this machine
    SYSTEM_INFO sysinfo;
    GetSystemInfo(&sysinfo);
    return (sysinfo.dwNumberOfProcessors);
#endif
}


CodecType GetCodecType(CMP_FORMAT format) {
    switch(format) {
    case CMP_FORMAT_ARGB_2101010:
        return CT_None;
    case CMP_FORMAT_RGBA_8888:
        return CT_None;
    case CMP_FORMAT_BGRA_8888:
        return CT_None;
    case CMP_FORMAT_RGBA_8888_S:
    case CMP_FORMAT_ARGB_8888:
        return CT_None;
    case CMP_FORMAT_BGR_888:
        return CT_None;
    case CMP_FORMAT_RGB_888:
        return CT_None;
    case CMP_FORMAT_RG_8:
        return CT_None;
    case CMP_FORMAT_R_8:
        return CT_None;
    case CMP_FORMAT_ARGB_16:
        return CT_None;
    case CMP_FORMAT_RG_16:
        return CT_None;
    case CMP_FORMAT_R_16:
        return CT_None;
    case CMP_FORMAT_ABGR_16F:
    case CMP_FORMAT_ARGB_16F:
        return CT_None;
    case CMP_FORMAT_RG_16F:
        return CT_None;
    case CMP_FORMAT_R_16F:
        return CT_None;
    case CMP_FORMAT_ARGB_32F:
        return CT_None;
    case CMP_FORMAT_RG_32F:
        return CT_None;
    case CMP_FORMAT_R_32F:
        return CT_None;
    case CMP_FORMAT_RGBE_32F:
        return CT_None;
#ifdef ARGB_32_SUPPORT
    case CMP_FORMAT_ARGB_32:
        return CT_None;
    case CMP_FORMAT_RG_32:
        return CT_None;
    case CMP_FORMAT_R_32:
        return CT_None;
#endif  // ARGB_32_SUPPORT
    case CMP_FORMAT_DXT1:
        return CT_DXT1;
    case CMP_FORMAT_DXT3:
        return CT_DXT3;
    case CMP_FORMAT_DXT5:
        return CT_DXT5;
    case CMP_FORMAT_DXT5_xGBR:
        return CT_DXT5_xGBR;
    case CMP_FORMAT_DXT5_RxBG:
        return CT_DXT5_RxBG;
    case CMP_FORMAT_DXT5_RBxG:
        return CT_DXT5_RBxG;
    case CMP_FORMAT_DXT5_xRBG:
        return CT_DXT5_xRBG;
    case CMP_FORMAT_DXT5_RGxB:
        return CT_DXT5_RGxB;
    case CMP_FORMAT_DXT5_xGxR:
        return CT_DXT5_xGxR;
    case CMP_FORMAT_ATI1N:
        return CT_ATI1N;
    case CMP_FORMAT_ATI2N:
        return CT_ATI2N;
    case CMP_FORMAT_ATI2N_XY:
        return CT_ATI2N_XY;
    case CMP_FORMAT_ATI2N_DXT5:
        return CT_ATI2N_DXT5;
    case CMP_FORMAT_BC1:
        return CT_DXT1;
    case CMP_FORMAT_BC2:
        return CT_DXT3;
    case CMP_FORMAT_BC3:
        return CT_DXT5;
    case CMP_FORMAT_BC4:
        return CT_ATI1N;
    case CMP_FORMAT_BC4_S:
        return CT_ATI1N_S;
    case CMP_FORMAT_BC5:
        return CT_ATI2N_XY;  // Red & Green channels
    case CMP_FORMAT_BC5_S:
        return CT_ATI2N_XY_S;  // Red & Green channels
    case CMP_FORMAT_BC6H:
        return CT_BC6H;
    case CMP_FORMAT_BC6H_SF:
        return CT_BC6H_SF;
    case CMP_FORMAT_BC7:
        return CT_BC7;
    case CMP_FORMAT_ASTC:
        return CT_ASTC;
    case CMP_FORMAT_ATC_RGB:
        return CT_ATC_RGB;
    case CMP_FORMAT_ATC_RGBA_Explicit:
        return CT_ATC_RGBA_Explicit;
    case CMP_FORMAT_ATC_RGBA_Interpolated:
        return CT_ATC_RGBA_Interpolated;
    case CMP_FORMAT_ETC_RGB:
        return CT_ETC_RGB;
    case CMP_FORMAT_ETC2_RGB:
        return CT_ETC2_RGB;
    case CMP_FORMAT_ETC2_SRGB:
        return CT_ETC2_SRGB;
    case CMP_FORMAT_ETC2_RGBA:
        return CT_ETC2_RGBA;
    case CMP_FORMAT_ETC2_RGBA1:
        return CT_ETC2_RGBA1;
    case CMP_FORMAT_ETC2_SRGBA:
        return CT_ETC2_SRGBA;
    case CMP_FORMAT_ETC2_SRGBA1:
        return CT_ETC2_SRGBA1;
#ifdef USE_APC
    case CMP_FORMAT_APC:
        return CT_APC;
#endif
#ifdef USE_GTC
    case CMP_FORMAT_GTC:
        return CT_GTC;
#endif
#ifdef USE_BASIS
    case CMP_FORMAT_BASIS:
        return CT_BASIS;
#endif
    default:
        return CT_Unknown;
    }
}

bool IsFloatFormat(CMP_FORMAT InFormat) {
    switch (InFormat) {
    case CMP_FORMAT_ARGB_16F:
    case CMP_FORMAT_ABGR_16F:
    case CMP_FORMAT_RGBA_16F:
    case CMP_FORMAT_BGRA_16F:
    case CMP_FORMAT_RG_16F:
    case CMP_FORMAT_R_16F:
    case CMP_FORMAT_ARGB_32F:
    case CMP_FORMAT_ABGR_32F:
    case CMP_FORMAT_RGBA_32F:
    case CMP_FORMAT_BGRA_32F:
    case CMP_FORMAT_RGB_32F:
    case CMP_FORMAT_BGR_32F:
    case CMP_FORMAT_RG_32F:
    case CMP_FORMAT_R_32F:
    case CMP_FORMAT_BC6H:
    case CMP_FORMAT_BC6H_SF:
    case CMP_FORMAT_RGBE_32F: {
        return true;
    }
    break;
    default:
        break;
    }

    return false;
}

bool IsCompressedFormat(CMP_FORMAT InFormat)
{
    switch (InFormat)
    {
        case CMP_FORMAT_ASTC:
        case CMP_FORMAT_ATI1N:
        case CMP_FORMAT_ATI2N:
        case CMP_FORMAT_ATI2N_XY:
        case CMP_FORMAT_ATI2N_DXT5:
        case CMP_FORMAT_ATC_RGB:
        case CMP_FORMAT_ATC_RGBA_Explicit:
        case CMP_FORMAT_ATC_RGBA_Interpolated:
        case CMP_FORMAT_BC1:
        case CMP_FORMAT_BC2:
        case CMP_FORMAT_BC3:
        case CMP_FORMAT_BC4:
        case CMP_FORMAT_BC4_S:
        case CMP_FORMAT_BC5:  
        case CMP_FORMAT_BC5_S:
        case CMP_FORMAT_BC6H: 
        case CMP_FORMAT_BC6H_SF:
        case CMP_FORMAT_BC7: 
        case CMP_FORMAT_DXT1:
        case CMP_FORMAT_DXT3:
        case CMP_FORMAT_DXT5:
        case CMP_FORMAT_DXT5_xGBR:
        case CMP_FORMAT_DXT5_RxBG:
        case CMP_FORMAT_DXT5_RBxG:
        case CMP_FORMAT_DXT5_xRBG:
        case CMP_FORMAT_DXT5_RGxB:
        case CMP_FORMAT_DXT5_xGxR:
        case CMP_FORMAT_ETC_RGB:  
        case CMP_FORMAT_ETC2_RGB: 
        case CMP_FORMAT_ETC2_SRGB:
        case CMP_FORMAT_ETC2_RGBA:
        case CMP_FORMAT_ETC2_RGBA1:  
        case CMP_FORMAT_ETC2_SRGBA:  
        case CMP_FORMAT_ETC2_SRGBA1: 
        case CMP_FORMAT_PVRTC:       
#ifdef USE_APC
        case CMP_FORMAT_APC:  //< APC Texture Compressor
#endif
        case         CMP_FORMAT_GTC     :    //< GTC   Fast Gradient Texture Compressor
        case         CMP_FORMAT_BASIS   :  //< BASIS compression
    {
        return true;
    }
    break;
    default:
        break;
    }

    return false;
}

CMP_BYTE FormatChannelBitSize(CMP_FORMAT InFormat)
{
    switch (InFormat)
    {
    case CMP_FORMAT_ARGB_16F:
    case CMP_FORMAT_ABGR_16F:
    case CMP_FORMAT_RGBA_16F:
    case CMP_FORMAT_BGRA_16F:
    case CMP_FORMAT_RG_16F:
    case CMP_FORMAT_R_16F:
    case CMP_FORMAT_ARGB_16:
    case CMP_FORMAT_ABGR_16:
    case CMP_FORMAT_RGBA_16:
    case CMP_FORMAT_BGRA_16:
    case CMP_FORMAT_RG_16:
    case CMP_FORMAT_R_16:
    case CMP_FORMAT_BC6H:
    case CMP_FORMAT_BC6H_SF:
    case CMP_FORMAT_ARGB_2101010:
    { 
        return (CMP_BYTE)(16);
    }
    break;

    case CMP_FORMAT_ARGB_32F:
    case CMP_FORMAT_ABGR_32F:
    case CMP_FORMAT_RGBA_32F:
    case CMP_FORMAT_BGRA_32F:
    case CMP_FORMAT_RGB_32F:
    case CMP_FORMAT_BGR_32F:
    case CMP_FORMAT_RG_32F:
    case CMP_FORMAT_R_32F:
    case CMP_FORMAT_RGBE_32F:
    {
        return (CMP_BYTE)(32);
    }
    break;
    }

    return (CMP_BYTE)(8);
}

bool NeedSwizzle(CMP_FORMAT destformat) {
    // determin of the swizzle flag needs to be turned on!
    switch (destformat) {
    case CMP_FORMAT_BC4:
    case CMP_FORMAT_BC4_S:
    case CMP_FORMAT_ATI1N:  // same as BC4
    case CMP_FORMAT_BC5:
    case CMP_FORMAT_BC5_S:
    case CMP_FORMAT_ATI2N:       // Green & Red Channels
    case CMP_FORMAT_ATI2N_XY:    // same as ATI2N  with XY = Red & Green channels
    case CMP_FORMAT_ATI2N_DXT5:  // same as BC5
    case CMP_FORMAT_BC1:
    case CMP_FORMAT_DXT1:        // same as BC1
    case CMP_FORMAT_BC2:
    case CMP_FORMAT_DXT3:        // same as BC2
    case CMP_FORMAT_BC3:
    case CMP_FORMAT_DXT5:        // same as BC3
    case CMP_FORMAT_ATC_RGB:
    case CMP_FORMAT_ATC_RGBA_Explicit:
    case CMP_FORMAT_ATC_RGBA_Interpolated:
        return true;
        break;
    default:
        break;
    }

    return false;
}

inline float clamp(float a, float l, float h) {
    return (a < l) ? l : ((a > h) ? h : a);
}

inline float knee(double x, double f) {
    return float(log(x * f + 1.f) / f);
}

float findKneeValue(float x, float y) {
    float f0 = 0;
    float f1 = 1.f;

    while (knee(x, f1) > y) {
        f0 = f1;
        f1 = f1 * 2.f;
    }

    for (int i = 0; i < 30; ++i) {
        const float f2 = (f0 + f1) / 2.f;
        const float y2 = knee(x, f2);

        if (y2 < y) {
            f1 = f2;
        } else {
            f0 = f2;
        }
    }

    return (f0 + f1) / 2.f;
}

CMP_ERROR Byte2HalfShort(CMP_HALFSHORT* hfsBlock, CMP_BYTE* cBlock, CMP_DWORD dwcBlockSize) {
    assert(hfsBlock);
    assert(cBlock);
    assert(dwcBlockSize);
    if (hfsBlock && cBlock && dwcBlockSize) {
        for (CMP_DWORD i = 0; i < dwcBlockSize; i++) {
            hfsBlock[i] = CMP_HALF(float(cBlock[i] / 255.0f)).bits();
        }
    }

    return CMP_OK;
}

CMP_ERROR Float2Byte(CMP_BYTE cBlock[], CMP_FLOAT* fBlock, CMP_Texture* srcTexture, CMP_FORMAT destFormat, const CMP_CompressOptions* pOptions) {
    (destFormat);
    assert(cBlock);
    assert(fBlock);
    assert(&srcTexture);

    if (cBlock && fBlock) {
        CMP_HALF* hfData = (CMP_HALF*)fBlock;
        float r = 0, g = 0, b = 0, a = 0;

        float kl = powf(2.f, pOptions->fInputKneeLow);
        float f = findKneeValue(powf(2.f, pOptions->fInputKneeHigh) - kl, powf(2.f, 3.5f) - kl);
        float luminance3f = powf(2, -3.5);         // always assume max intensity is 1 and 3.5f darker for scale later
        float invGamma = 1/ pOptions->fInputGamma; //for gamma correction
        float scale = (float)255.0 * powf(luminance3f, invGamma);
        int i = 0;
        bool needSwizzle = false; // NeedSwizzle(destFormat); Bug Fix This is no longer needed for <=BC5 and rel
        for (unsigned int y = 0; y < srcTexture->dwHeight; y++) {
            for (unsigned int x = 0; x < srcTexture->dwWidth; x++) {
                if (srcTexture->format == CMP_FORMAT_ARGB_16F) {
                    if (needSwizzle) {
                        b = (float)(*hfData);
                        hfData++;
                        g = (float)(*hfData);
                        hfData++;
                        r = (float)(*hfData);
                        hfData++;
                        a = (float)(*hfData);
                        hfData++;
                    } else {
                        r = (float)(*hfData);
                        hfData++;
                        g = (float)(*hfData);
                        hfData++;
                        b = (float)(*hfData);
                        hfData++;
                        a = (float)(*hfData);
                        hfData++;
                    }
                } else if (srcTexture->format == CMP_FORMAT_ARGB_32F) {
                    if (needSwizzle) {
                        b = (float)(*fBlock);
                        fBlock++;
                        g = (float)(*fBlock);
                        fBlock++;
                        r = (float)(*fBlock);
                        fBlock++;
                        a = (float)(*fBlock);
                        fBlock++;
                    } else {
                        r = (float)(*fBlock);
                        fBlock++;
                        g = (float)(*fBlock);
                        fBlock++;
                        b = (float)(*fBlock);
                        fBlock++;
                        a = (float)(*fBlock);
                        fBlock++;
                    }
                }

                CMP_BYTE r_b, g_b, b_b, a_b;


                //  1) Compensate for fogging by subtracting defog
                //     from the raw pixel values.
                // We assume a defog of 0
                if (pOptions->fInputDefog > 0.0) {
                    r = r - pOptions->fInputDefog;
                    g = g - pOptions->fInputDefog;
                    b = b - pOptions->fInputDefog;
                    a = a - pOptions->fInputDefog;
                }

                //  2) Multiply the defogged pixel values by
                //     2^(exposure + 2.47393).
                const float exposeScale = powf(2, pOptions->fInputExposure + 2.47393f);
                r = r * exposeScale;
                g = g * exposeScale;
                b = b * exposeScale;
                a = a * exposeScale;

                //  3) Values that are now 1.0 are called "middle gray".
                //     If defog and exposure are both set to 0.0, then
                //     middle gray corresponds to a raw pixel value of 0.18.
                //     In step 6, middle gray values will be mapped to an
                //     intensity 3.5 f-stops below the display's maximum
                //     intensity.

                //  4) Apply a knee function.  The knee function has two
                //     parameters, kneeLow and kneeHigh.  Pixel values
                //     below 2^kneeLow are not changed by the knee
                //     function.  Pixel values above kneeLow are lowered
                //     according to a logarithmic curve, such that the
                //     value 2^kneeHigh is mapped to 2^3.5.  (In step 6,
                //     this value will be mapped to the the display's
                //     maximum intensity.)
                if (r > kl) {
                    r = kl + knee(r - kl, f);
                }
                if (g > kl) {
                    g = kl + knee(g - kl, f);
                }
                if (b > kl) {
                    b = kl + knee(b - kl, f);
                }
                if (a > kl) {
                    a = kl + knee(a - kl, f);
                }

                //  5) Gamma-correct the pixel values, according to the
                //     screen's gamma.  (We assume that the gamma curve
                //     is a simple power function.)
                r = powf(r, invGamma);
                g = powf(g, invGamma);
                b = powf(b, invGamma);
                a = powf(a, pOptions->fInputGamma);

                //  6) Scale the values such that middle gray pixels are
                //     mapped to a frame buffer value that is 3.5 f-stops
                //     below the display's maximum intensity.
                r *= scale;
                g *= scale;
                b *= scale;
                a *= scale;

                r_b = (CMP_BYTE)clamp(r, 0.f, 255.f);
                g_b = (CMP_BYTE)clamp(g, 0.f, 255.f);
                b_b = (CMP_BYTE)clamp(b, 0.f, 255.f);
                a_b = (CMP_BYTE)clamp(a, 0.f, 255.f);
                cBlock[i] = r_b;
                i++;
                cBlock[i] = g_b;
                i++;
                cBlock[i] = b_b;
                i++;
                cBlock[i] = a_b;
                i++;
            }

        }

    }

    return CMP_OK;
}

CMP_ERROR RGBA_Word2Byte(CMP_BYTE cBlock[], CMP_WORD* wBlock, CMP_Texture* srcTexture)
{
    assert(cBlock);
    assert(wBlock);
    assert(&srcTexture);

    if (cBlock && wBlock)
    {
        unsigned int cBlockIndex = 0;
        unsigned int wBlockIndex = 0;
        for (unsigned int y = 0; y < srcTexture->dwHeight; y++)
        {
            for (unsigned int x = 0; x < srcTexture->dwWidth; x++)
            {
                // 4 channel data conversion from 16 bit to 8 bit
                cBlock[cBlockIndex++] = (CMP_BYTE)(wBlock[wBlockIndex++] / 257);
                cBlock[cBlockIndex++] = (CMP_BYTE)(wBlock[wBlockIndex++] / 257);
                cBlock[cBlockIndex++] = (CMP_BYTE)(wBlock[wBlockIndex++] / 257);
                cBlock[cBlockIndex++] = (CMP_BYTE)(wBlock[wBlockIndex++] / 257);
            }
        }
    }

    return CMP_OK;
}

CMP_ERROR SByte2Byte(CMP_BYTE cBlock[], CMP_SBYTE* sBlock, CMP_Texture* srcTexture)
{
    assert(cBlock);
    assert(sBlock);
    assert(&srcTexture);

    if (cBlock && sBlock)
    {
        unsigned int cBlockIndex = 0;
        unsigned int wBlockIndex = 0;
        for (unsigned int y = 0; y < srcTexture->dwHeight; y++)
        {
            for (unsigned int x = 0; x < srcTexture->dwWidth; x++)
            {
                // 4 channel data conversion from 16 bit to 8 bit
                cBlock[cBlockIndex++] = sBlock[wBlockIndex++] + 127;
                cBlock[cBlockIndex++] = sBlock[wBlockIndex++] + 127;
                cBlock[cBlockIndex++] = sBlock[wBlockIndex++] + 127;
                cBlock[cBlockIndex++] = sBlock[wBlockIndex++] + 127; // ?? alpha
            }
        }
    }

    return CMP_OK;
}

CMP_ERROR Byte2SByte(CMP_SBYTE sBlock[], CMP_BYTE* cBlock, CMP_Texture* srcTexture)
{
    assert(cBlock);
    assert(sBlock);
    assert(&srcTexture);

    if (cBlock && sBlock)
    {
        unsigned int cBlockIndex = 0;
        unsigned int wBlockIndex = 0;
        for (unsigned int y = 0; y < srcTexture->dwHeight; y++)
        {
            for (unsigned int x = 0; x < srcTexture->dwWidth; x++)
            {
                sBlock[cBlockIndex++] = cBlock[wBlockIndex++] - 127;
                sBlock[cBlockIndex++] = cBlock[wBlockIndex++] - 127;
                sBlock[cBlockIndex++] = cBlock[wBlockIndex++] - 127;
                sBlock[cBlockIndex++] = cBlock[wBlockIndex++] - 127; // ?? alpha
            }
        }
    }

    return CMP_OK;
}


CMP_ERROR GetError(CodecError err) {
    switch(err) {
    case CE_OK:
        return CMP_OK;
    case CE_Aborted:
        return CMP_ABORTED;
    case CE_Unknown:
        return CMP_ERR_GENERIC;
    default:
        return CMP_ERR_GENERIC;
    }
}

CMP_ERROR CheckTexture(const CMP_Texture* pTexture, bool bSource) {
    assert(pTexture);
    if(pTexture == NULL)
        return (bSource ? CMP_ERR_INVALID_SOURCE_TEXTURE : CMP_ERR_INVALID_DEST_TEXTURE);

    assert(pTexture->dwSize == sizeof(CMP_Texture));
    if(pTexture->dwSize != sizeof(CMP_Texture))
        return (bSource ? CMP_ERR_INVALID_SOURCE_TEXTURE : CMP_ERR_INVALID_DEST_TEXTURE);

    assert(pTexture->dwWidth > 0);
    if(pTexture->dwWidth <= 0 )
        return (bSource ? CMP_ERR_INVALID_SOURCE_TEXTURE : CMP_ERR_INVALID_DEST_TEXTURE);

    assert(pTexture->dwHeight > 0);
    if(pTexture->dwHeight <= 0 )
        return (bSource ? CMP_ERR_INVALID_SOURCE_TEXTURE : CMP_ERR_INVALID_DEST_TEXTURE);

    assert(pTexture->format >= CMP_FORMAT_RGBA_8888_S && pTexture->format <= CMP_FORMAT_MAX);
    if (pTexture->format < CMP_FORMAT_RGBA_8888_S || pTexture->format > CMP_FORMAT_MAX)
        return (bSource ? CMP_ERR_UNSUPPORTED_SOURCE_FORMAT : CMP_ERR_UNSUPPORTED_DEST_FORMAT);

    assert((pTexture->format != CMP_FORMAT_ARGB_8888 && pTexture->format != CMP_FORMAT_ARGB_2101010)
           || pTexture->dwPitch == 0 || pTexture->dwPitch >= (pTexture->dwWidth*4));

    assert((pTexture->format != CMP_FORMAT_RGBA_8888_S && pTexture->format != CMP_FORMAT_ARGB_2101010) || pTexture->dwPitch == 0 ||
           pTexture->dwPitch >= (pTexture->dwWidth * 4));

    if((pTexture->format == CMP_FORMAT_ARGB_8888 || pTexture->format == CMP_FORMAT_ARGB_2101010)
            && pTexture->dwPitch != 0 && pTexture->dwPitch < (pTexture->dwWidth*4))
        return (bSource ? CMP_ERR_UNSUPPORTED_SOURCE_FORMAT : CMP_ERR_UNSUPPORTED_DEST_FORMAT);

    if ((pTexture->format == CMP_FORMAT_RGBA_8888_S || pTexture->format == CMP_FORMAT_ARGB_2101010) && pTexture->dwPitch != 0 &&
        pTexture->dwPitch < (pTexture->dwWidth * 4))
        return (bSource ? CMP_ERR_UNSUPPORTED_SOURCE_FORMAT : CMP_ERR_UNSUPPORTED_DEST_FORMAT);

    assert(pTexture->pData);
    if(pTexture->pData == NULL)
        return (bSource ? CMP_ERR_INVALID_SOURCE_TEXTURE : CMP_ERR_INVALID_DEST_TEXTURE);

    CMP_DWORD dwDataSize = CMP_CalculateBufferSize(pTexture);
    assert(pTexture->dwDataSize >= dwDataSize);
    if(pTexture->dwDataSize < dwDataSize)
        return (bSource ? CMP_ERR_INVALID_SOURCE_TEXTURE : CMP_ERR_INVALID_DEST_TEXTURE);

    return CMP_OK;
}

CMP_ERROR CompressTexture(const CMP_Texture* pSourceTexture, CMP_Texture* pDestTexture, const CMP_CompressOptions* pOptions, CMP_Feedback_Proc pFeedbackProc, CodecType destType) {
    // Compressing
    CCodec* pCodec = CreateCodec(destType);
    assert(pCodec);
    if(pCodec == NULL)
        return CMP_ERR_UNABLE_TO_INIT_CODEC;

    CMP_BOOL swizzleSrcBuffer = false;

    // Have we got valid options ?
    if(pOptions && pOptions->dwSize == sizeof(CMP_CompressOptions)) {
        // Set weightings ?
        if(pOptions->bUseChannelWeighting && (pOptions->fWeightingRed > 0.0 || pOptions->fWeightingGreen > 0.0 || pOptions->fWeightingBlue > 0.0)) {
            pCodec->SetParameter("UseChannelWeighting", (CMP_DWORD) 1);
            pCodec->SetParameter("WeightR",
                                 pOptions->fWeightingRed > MINIMUM_WEIGHT_VALUE ?
                                 (CODECFLOAT) pOptions->fWeightingRed : MINIMUM_WEIGHT_VALUE);
            pCodec->SetParameter("WeightG",
                                 pOptions->fWeightingGreen > MINIMUM_WEIGHT_VALUE ?
                                 (CODECFLOAT) pOptions->fWeightingGreen : MINIMUM_WEIGHT_VALUE);
            pCodec->SetParameter("WeightB",
                                 pOptions->fWeightingBlue > MINIMUM_WEIGHT_VALUE ?
                                 (CODECFLOAT) pOptions->fWeightingBlue : MINIMUM_WEIGHT_VALUE);
        }
        pCodec->SetParameter("UseAdaptiveWeighting", (CMP_DWORD) pOptions->bUseAdaptiveWeighting);
        pCodec->SetParameter("DXT1UseAlpha", (CMP_DWORD) pOptions->bDXT1UseAlpha);
        pCodec->SetParameter("AlphaThreshold", (CMP_DWORD) pOptions->nAlphaThreshold);
        if (pOptions->bUseRefinementSteps)
            pCodec->SetParameter("RefineSteps", (CMP_DWORD) pOptions->nRefinementSteps);
        // New override to that set quality if compresion for DXTn & ATInN codecs
        if (pOptions->fquality != AMD_CODEC_QUALITY_DEFAULT) {
            pCodec->SetParameter("Quality", (CODECFLOAT)pOptions->fquality);
#ifndef _WIN64
            if (pOptions->fquality < 0.3)
                pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)CMP_Speed_SuperFast);
            else if (pOptions->fquality < 0.6)
                pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)CMP_Speed_Fast);
            else
#endif
                pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)CMP_Speed_Normal);
        } else
            pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)pOptions->nCompressionSpeed);


        switch(destType) {
        case CT_BC7:
            pCodec->SetParameter("MultiThreading", (CMP_DWORD) !pOptions->bDisableMultiThreading);

            if (!pOptions->bDisableMultiThreading)
                pCodec->SetParameter("NumThreads", (CMP_DWORD) pOptions->dwnumThreads);
            else
                pCodec->SetParameter("NumThreads", (CMP_DWORD) 1);

            pCodec->SetParameter("ModeMask", (CMP_DWORD) pOptions->dwmodeMask);
            pCodec->SetParameter("ColourRestrict", (CMP_DWORD) pOptions->brestrictColour);
            pCodec->SetParameter("AlphaRestrict", (CMP_DWORD) pOptions->brestrictAlpha);
            pCodec->SetParameter("Quality", (CODECFLOAT) pOptions->fquality);
            break;
#ifdef USE_BASIS
        case CT_BASIS:
#endif
        case CT_ASTC:
            pCodec->SetParameter("Quality", (CODECFLOAT)pOptions->fquality);
            if (!pOptions->bDisableMultiThreading)
                pCodec->SetParameter("NumThreads", (CMP_DWORD)pOptions->dwnumThreads);
            else
                pCodec->SetParameter("NumThreads", (CMP_DWORD)1);
            break;
#ifdef USE_APC
        case CT_APC:
#endif
#ifdef USE_GTC
        case CT_GTC:
#endif
        case CT_BC6H:
        case CT_BC6H_SF:
            pCodec->SetParameter("Quality", (CODECFLOAT)pOptions->fquality);
            if (!pOptions->bDisableMultiThreading)
                pCodec->SetParameter("NumThreads", (CMP_DWORD)pOptions->dwnumThreads);
            else
                pCodec->SetParameter("NumThreads", (CMP_DWORD)1);
#ifdef _DEBUG
            // napatel : remove this after
            // pCodec->SetParameter("NumThreads", (CMP_DWORD)1);
#endif
            break;
        }

        // This will eventually replace the above code for setting codec options
        if (pOptions->NumCmds > 0) {
            int maxCmds=pOptions->NumCmds;
            if (pOptions->NumCmds > AMD_MAX_CMDS) maxCmds = AMD_MAX_CMDS;
            for (int i=0; i<maxCmds; i++)
                pCodec->SetParameter(pOptions->CmdSet[i].strCommand, (CMP_CHAR*)pOptions->CmdSet[i].strParameter);
        }

        // GPUOpen issue # 59 fix
        CodecBufferType srcBufferType = GetCodecBufferType(pSourceTexture->format);
        if (NeedSwizzle(pDestTexture->format)) {
            switch (srcBufferType) {
            case CBT_BGRA8888:
            case CBT_BGR888:
                swizzleSrcBuffer = false;
                break;
            default:
                swizzleSrcBuffer = true;
                break;
            }
        }
    }

    CodecBufferType srcBufferType = GetCodecBufferType(pSourceTexture->format);

    CCodecBuffer* pSrcBuffer  = CreateCodecBuffer(srcBufferType,
                                pSourceTexture->nBlockWidth, pSourceTexture->nBlockHeight, pSourceTexture->nBlockDepth,
                                pSourceTexture->dwWidth, pSourceTexture->dwHeight, pSourceTexture->dwPitch, pSourceTexture->pData,
                                pSourceTexture->dwDataSize);

    CCodecBuffer* pDestBuffer = pCodec->CreateBuffer(
                                    pDestTexture->nBlockWidth, pDestTexture->nBlockHeight, pDestTexture->nBlockDepth,
                                    pDestTexture->dwWidth, pDestTexture->dwHeight, pDestTexture->dwPitch, pDestTexture->pData,
                                    pDestTexture->dwDataSize);

    assert(pSrcBuffer);
    assert(pDestBuffer);
    if(pSrcBuffer == NULL || pDestBuffer == NULL) {
        SAFE_DELETE(pCodec);
        SAFE_DELETE(pSrcBuffer);
        SAFE_DELETE(pDestBuffer);
        return CMP_ERR_GENERIC;
    }

    pSrcBuffer->SetFormat(pSourceTexture->format);
    pDestBuffer->SetFormat(pDestTexture->format);


    // GPUOpen issue # 59 and #67 fix
    pSrcBuffer->m_bSwizzle = swizzleSrcBuffer;
    DISABLE_FP_EXCEPTIONS;
    CodecError err = pCodec->Compress(*pSrcBuffer, *pDestBuffer, pFeedbackProc);
    RESTORE_FP_EXCEPTIONS;

    SAFE_DELETE(pCodec);
    SAFE_DELETE(pSrcBuffer);
    SAFE_DELETE(pDestBuffer);

    return GetError(err);
}

#ifdef THREADED_COMPRESS

class CATICompressThreadData {
  public:
    CATICompressThreadData();
    ~CATICompressThreadData();

    CCodec* m_pCodec;
    CCodecBuffer* m_pSrcBuffer;
    CCodecBuffer* m_pDestBuffer;
    CMP_Feedback_Proc m_pFeedbackProc;
    CodecError m_errorCode;
};

CATICompressThreadData::CATICompressThreadData() : m_pCodec(NULL), m_pSrcBuffer(NULL), m_pDestBuffer(NULL),
    m_pFeedbackProc(NULL),
    m_errorCode( CE_OK ) {
}

CATICompressThreadData::~CATICompressThreadData() {
    SAFE_DELETE(m_pCodec);
    SAFE_DELETE(m_pSrcBuffer);
    SAFE_DELETE(m_pDestBuffer);
}

void ThreadedCompressProc(void *lpParameter) {
    CATICompressThreadData *pThreadData = (CATICompressThreadData*) lpParameter;
    DISABLE_FP_EXCEPTIONS;
    CodecError err = pThreadData->m_pCodec->Compress(*pThreadData->m_pSrcBuffer, *pThreadData->m_pDestBuffer, pThreadData->m_pFeedbackProc);
    RESTORE_FP_EXCEPTIONS;
    pThreadData->m_errorCode = err;
}

CMP_ERROR ThreadedCompressTexture(const CMP_Texture* pSourceTexture, CMP_Texture* pDestTexture, const CMP_CompressOptions* pOptions, CMP_Feedback_Proc pFeedbackProc, CodecType destType) {
    // Note function should not be called for the following Codecs....
    if (destType == CT_BC7)     return CMP_ABORTED;
#ifdef USE_APC
    if (destType == CT_APC)
        return CMP_ABORTED;
#endif
#ifdef USE_GTC
    if (destType == CT_GTC)     return CMP_ABORTED;
#endif
#ifdef USE_BASIS
    if (destType == CT_BASIS)   return CMP_ABORTED;
#endif
    if (destType == CT_ASTC)  return CMP_ABORTED;

    CMP_DWORD dwMaxThreadCount = cmp_minT(f_dwProcessorCount, MAX_THREADS);
    CMP_DWORD dwLinesRemaining = pDestTexture->dwHeight;
    CMP_BYTE* pSourceData = pSourceTexture->pData;
    CMP_BYTE* pDestData = pDestTexture->pData;
    CMP_BOOL swizzleSrcBuffer = false;

#ifdef _DEBUG
    if (
        (pDestTexture->format == CMP_FORMAT_ETC2_RGBA) ||
        (pDestTexture->format == CMP_FORMAT_ETC2_RGBA1)
    )
        dwMaxThreadCount = 1;
#endif

    CATICompressThreadData aThreadData[MAX_THREADS];
    std::thread ahThread[MAX_THREADS];

    CMP_DWORD dwThreadCount = 0;
    for(CMP_DWORD dwThread = 0; dwThread < dwMaxThreadCount; dwThread++) {
        CATICompressThreadData& threadData = aThreadData[dwThread];

        // Compressing
        threadData.m_pCodec = CreateCodec(destType);
        assert(threadData.m_pCodec);
        if(threadData.m_pCodec == NULL)
            return CMP_ERR_UNABLE_TO_INIT_CODEC;

        // Have we got valid options ?
        if(pOptions && pOptions->dwSize == sizeof(CMP_CompressOptions)) {
            // Set weightings ?
            if(pOptions->bUseChannelWeighting && (pOptions->fWeightingRed > 0.0 || pOptions->fWeightingGreen > 0.0 || pOptions->fWeightingBlue > 0.0)) {
                threadData.m_pCodec->SetParameter("UseChannelWeighting", (CMP_DWORD) 1);
                threadData.m_pCodec->SetParameter("WeightR",
                                                  pOptions->fWeightingRed > MINIMUM_WEIGHT_VALUE ?
                                                  (CODECFLOAT) pOptions->fWeightingRed : MINIMUM_WEIGHT_VALUE);
                threadData.m_pCodec->SetParameter("WeightG",
                                                  pOptions->fWeightingGreen > MINIMUM_WEIGHT_VALUE ?
                                                  (CODECFLOAT) pOptions->fWeightingGreen : MINIMUM_WEIGHT_VALUE);
                threadData.m_pCodec->SetParameter("WeightB",
                                                  pOptions->fWeightingBlue > MINIMUM_WEIGHT_VALUE ?
                                                  (CODECFLOAT) pOptions->fWeightingBlue : MINIMUM_WEIGHT_VALUE);
            }
            threadData.m_pCodec->SetParameter("UseAdaptiveWeighting", (CMP_DWORD) pOptions->bUseAdaptiveWeighting);
            threadData.m_pCodec->SetParameter("DXT1UseAlpha", (CMP_DWORD) pOptions->bDXT1UseAlpha);
            threadData.m_pCodec->SetParameter("AlphaThreshold", (CMP_DWORD) pOptions->nAlphaThreshold);
            threadData.m_pCodec->SetParameter("RefineSteps", (CMP_DWORD) pOptions->nRefinementSteps);
            threadData.m_pCodec->SetParameter("Quality", (CODECFLOAT)pOptions->fquality);

            // New override to that set quality if compresion for DXTn & ATInN codecs
            if (pOptions->fquality != AMD_CODEC_QUALITY_DEFAULT) {
                if (pOptions->fquality < 0.3)
                    threadData.m_pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)CMP_Speed_SuperFast);
                else if (pOptions->fquality < 0.6)
                    threadData.m_pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)CMP_Speed_Fast);
                else
                    threadData.m_pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)CMP_Speed_Normal);
            } else
                threadData.m_pCodec->SetParameter("CompressionSpeed", (CMP_DWORD)pOptions->nCompressionSpeed);



            switch(destType) {
            case CT_BC6H:
                // Reserved
                break;
            }

            // This will eventually replace the above code for setting codec options
            // It is currently implemented with BC6H and can be expanded to other codec
            if (pOptions->NumCmds > 0) {
                int maxCmds = pOptions->NumCmds;
                if (pOptions->NumCmds > AMD_MAX_CMDS) maxCmds = AMD_MAX_CMDS;
                for (int i = 0; i<maxCmds; i++)
                    threadData.m_pCodec->SetParameter(pOptions->CmdSet[i].strCommand, (CMP_CHAR*)pOptions->CmdSet[i].strParameter);
            }


        }

        CodecBufferType srcBufferType = GetCodecBufferType(pSourceTexture->format);

        // GPUOpen issue # 59 fix
        if (NeedSwizzle(pDestTexture->format)) {
            switch (srcBufferType) {
            case CBT_BGRA8888:
            case CBT_BGR888:
                swizzleSrcBuffer = false;
                break;
            default:
                swizzleSrcBuffer = true;
                break;
            }
        }

        CMP_DWORD dwThreadsRemaining = dwMaxThreadCount - dwThread;
        CMP_DWORD dwHeight = 0;
        if(dwThreadsRemaining > 1) {
            CMP_DWORD dwBlockHeight = threadData.m_pCodec->GetBlockHeight();
            dwHeight = dwLinesRemaining / dwThreadsRemaining;
            dwHeight = cmp_minT(((dwHeight + dwBlockHeight - 1) / dwBlockHeight) * dwBlockHeight, dwLinesRemaining); // Round by block height
            dwLinesRemaining -= dwHeight;
        } else
            dwHeight = dwLinesRemaining;

        if(dwHeight > 0) {

            threadData.m_pSrcBuffer = CreateCodecBuffer(srcBufferType,
                                      pSourceTexture->nBlockWidth, pSourceTexture->nBlockHeight, pSourceTexture->nBlockDepth,
                                      pSourceTexture->dwWidth, dwHeight, pSourceTexture->dwPitch, pSourceData,
                                      pSourceTexture->dwDataSize);
            threadData.m_pSrcBuffer->SetFormat(pSourceTexture->format);

            threadData.m_pDestBuffer = threadData.m_pCodec->CreateBuffer(
                                           pDestTexture->nBlockWidth, pDestTexture->nBlockHeight, pDestTexture->nBlockDepth,
                                           pDestTexture->dwWidth, dwHeight, pDestTexture->dwPitch, pDestData,
                                           pDestTexture->dwDataSize);
            threadData.m_pDestBuffer->SetFormat(pDestTexture->format);

            pSourceData += CalcBufferSize(pSourceTexture->format, pSourceTexture->dwWidth, dwHeight, pSourceTexture->dwPitch, pSourceTexture->nBlockWidth, pSourceTexture->nBlockHeight);
            pDestData += CalcBufferSize(destType, pDestTexture->dwWidth, dwHeight, pDestTexture->nBlockWidth, pDestTexture->nBlockHeight);

            assert(threadData.m_pSrcBuffer);
            assert(threadData.m_pDestBuffer);
            if(threadData.m_pSrcBuffer == NULL || threadData.m_pDestBuffer == NULL)
                return CMP_ERR_GENERIC;

            threadData.m_pSrcBuffer->m_bSwizzle = swizzleSrcBuffer;
            threadData.m_pFeedbackProc = pFeedbackProc;

            ahThread[dwThreadCount++] = std::thread(ThreadedCompressProc, &threadData);
        }
    }

    for ( CMP_DWORD dwThread = 0; dwThread < dwThreadCount; dwThread++ ) {
        std::thread& curThread = ahThread[dwThread];

        curThread.join();
    }

    CodecError err = CE_OK;
    for(CMP_DWORD dwThread = 0; dwThread < dwThreadCount; dwThread++) {
        CATICompressThreadData& threadData = aThreadData[dwThread];

        if(err == CE_OK)
            err = threadData.m_errorCode;

        ahThread[dwThread] = std::thread();
    }

    return GetError(err);
}
#endif // THREADED_COMPRESS

